/*****************************************************************************************************************************************************/
/*  Project: Associations between risk factors and AD                                                                                                */
/*  Programmer: Longgang Zhao (lz7@email.sc.edu)                                                                *    ****                            */
/*  Date: May 16, 2002, version 1                                                                               *      *                             */
/*  Supervisor: Anwar T Merchant                                                                                *     *                              */
/*  Dataset: NHANES III (https://wwwn.cdc.gov/nchs/nhanes/nhanes3/default.aspx)                                 **** ****                            */
/*  Purpose 1: Define risk factor and find clusters uisng ML                                                                                         */
/*****************************************************************************************************************************************************/

/*------------------------------------------------------------------------------------------------------------*/
/**********************************Part I: macro and formats***************************************************/
/*------------------------------------------------------------------------------------------------------------*/

%macro getdetails(dataset,number=20);
options ls=max; proc contents data = &dataset varnum; run; proc print data = &dataset (obs = &number); run;
%mend;

%macro getlevels(dataset, var);
title "&dataset and &var"; proc sql; select count(distinct &var) as Levels, count(*) as Nobs from &dataset; quit; title;
%mend;

%macro histogram(dataset, var, format=5.2);
ods listing close;
title "&var";
ods select histogram;
proc univariate data = &dataset noprint; 
var &var;
histogram &var/vscale=count;
inset n='Sample size'(5.0) nmiss='Missing'(5.0) mean='Mean'(&format) std='Std Dev'(&format) median='Median'(&format) min='Min'(&format) max='Max'(&format);
format &var &format;
run;
title;
ods listing;
%mend;
%macro con(datain=,conlist=);
proc means min median max mean std n nmiss data = &datain; var &conlist; run;
%mend;
%macro cat(datain=,catlist=);
proc freq data = &datain; tables &catlist/norow nocol nopercent missing; run;
%mend;

/*------------------------------------------------------------------------------------------------------------*/
/**********************************Part IV: Univariate Analyses************************************************/
/*------------------------------------------------------------------------------------------------------------*/
libname dataloc "C:\Users\lz7\OneDrive - University of South Carolina\AD risk factors\Summer GA 2022\Data";
options fmtsearch=(dataloc);
data analyses; set dataloc.ADriskfactors220626;run;

%con(datain=analyses, conlist=Age bmi WC HEI SBP DBP WBC GBP CRP TCP LCP HDP TGP LPP scl_phone scl_together scl_neighb scl_church scl_meeting cognition);
%cat(datain=analyses, catlist=Agegp racegp sex edugp incomegp smkgp drinkgp bmigp rpagp diabetes hsCVD HP Cancer hsChol drug_hc anxiety depression yvisit perios scl_clubs ind_missing);

proc means data=analyses n nmiss min p10 p25 p50 p75 p90 max range; var cognition; run;

proc univariate data=analyses; var cognition; histogram cognition; run;

proc freq data=analyses; tables ind_missing; run;

data onlycogn; set analyses;
 if cognition^=.;
 if cognition<=10 then cogn_cat=1;
  else                  cogn_cat=0;
format cogn_cat yesno.;
run;

proc freq data=onlycogn; tables cogn_cat; run;

%con(datain=onlycogn, conlist=Age bmi WC HEI SBP DBP WBC GBP CRP TCP LCP HDP TGP LPP scl_phone scl_together scl_neighb scl_church scl_meeting cognition);

/*===========================================================*/
/*Table 1*/
%include "C:\Users\lz7\OneDrive - University of South Carolina\Tools\SASMacro\Macro_describe_perct.sas";
%describe(inputdata=onlycogn,exposure=cogn_cat,method=1,
            varlist=Age bmi WC HEI SBP DBP WBC CRP TCP HDP TGP scl_phone scl_together scl_neighb scl_church scl_meeting,
            catlist=Agegp racegp sex edugp incomegp smkgp drinkgp bmigp rpagp diabetes hsCVD HP Cancer hsChol drug_hc anxiety depression yvisit perios scl_clubs,
              digit=0.1);

/*categorical variables*/
%macro getOR(varlist=);
%if %sysfunc(exist(baseOR)) %then %do; proc delete data=baseOR; run; %end;
%if %sysfunc(exist(basep))  %then %do; proc delete data=basep;  run; %end;
%let varnumber=1;
%do %while (%scan(&varlist.,&varnumber.) ne );
%let var=%scan(&varlist.,&varnumber.);
proc logistic data = onlycogn;
 class Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary") hsChol(ref="NO")
       diabetes(ref="NO") hsCVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO") scl_clubs(ref="NO") hsChol(ref="NO") drug_hc(ref="NO")/param=ref;
 model cogn_cat (ref="NO") = &var;
 ods output OddsRatios=estimate ParameterEstimates=pvalue;
run;
data estimate2; set estimate;
 label=put(substr(Effect,1,20),$20.);
 OR=put(round(OddsRatioEst,0.01),4.2)||' ('||put(round(LowerCL,0.01),4.2)||'-'||put(round(UpperCL,0.01),4.2)||')';
 keep label OR;  
run;
data pvalue2; set pvalue;
label=put(substr(Variable,1,10),$10.);
if _n_ = 1 then delete;
if ProbChiSq < 0.001 then pvalue = "< 0.001";
 else if ProbChiSq < 0.01 then pvalue = put(round(ProbChiSq,0.001), 8.4);
  else                          pvalue = put(round(ProbChiSq,0.01), 8.3);
  keep label pvalue;
run;
%let varnumber=%eval(&varnumber.+1);
proc append base=baseOR data=estimate2 force;run;
proc append base=basep  data=pvalue2   force;run;
%end;
%mend;

%getOR(varlist=Agegp racegp sex edugp incomegp smkgp drinkgp bmigp rpagp diabetes hsCVD HP Cancer hsChol drug_hc anxiety depression yvisit perios scl_clubs); 
proc print data=baseOR; run; proc print data=basep; run;

/*continuous variables*/
%macro getOR(unit=,varlist=);
%if %sysfunc(exist(baseOR)) %then %do; proc delete data=baseOR; run; %end;
%if %sysfunc(exist(basep))  %then %do; proc delete data=basep;  run; %end;
%let varnumber=1;
%do %while (%scan(&varlist.,&varnumber.) ne );
%let var=%scan(&varlist.,&varnumber.);
proc logistic data = onlycogn;
 class Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary") hsChol(ref="NO")
       diabetes(ref="NO") hsCVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO") hsChol(ref="NO") drug_hc(ref="NO")/param=ref;
 model cogn_cat (ref="NO") = &var/clodds=wald;
 units &var=&unit;
 ods output  CLoddsWald=estimate ParameterEstimates=pvalue;
run;
data estimate2; set estimate;
 label=put(substr(Effect,1,20),$20.);
 OR=put(round(OddsRatioEst,0.01),4.2)||' ('||put(round(LowerCL,0.01),4.2)||'-'||put(round(UpperCL,0.01),4.2)||')';
 keep label OR Unit;  
run;
data pvalue2; set pvalue;
label=put(substr(Variable,1,10),$10.);
if _n_ = 1 then delete;
if ProbChiSq < 0.001 then pvalue = "< 0.001";
 else if ProbChiSq < 0.01 then pvalue = put(round(ProbChiSq,0.001), 8.4);
  else                          pvalue = put(round(ProbChiSq,0.01), 8.3);
  keep label pvalue;
run;
%let varnumber=%eval(&varnumber.+1);
proc append base=baseOR data=estimate2 force;run;
proc append base=basep  data=pvalue2   force;run;
%end;
%mend;
%getOR(unit=5,varlist=Age bmi WC HEI); proc print data=baseOR; run; proc print data=basep; run;

%getOR(unit=10,varlist=SBP DBP); proc print data=baseOR; run; proc print data=basep; run;

%getOR(unit=SD,varlist=WBC CRP TCP HDP TGP); proc print data=baseOR; run; proc print data=basep; run;

%getOR(unit=1,varlist=scl_phone); proc print data=baseOR; run; proc print data=basep; run;

%getOR(unit=54,varlist=scl_together scl_neighb scl_church scl_meeting); proc print data=baseOR; run; proc print data=basep; run;


/*MV model*/
proc logistic data = onlycogn;
 class Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary") hsChol(ref="NO")
       diabetes(ref="NO") hsCVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO") hsChol(ref="NO") drug_hc(ref="NO")/param=ref;
 model cogn_cat (ref="NO") = Age bmi WC HEI SBP DBP WBC CRP TCP HDP TGP scl_phone scl_together scl_neighb scl_church scl_meeting
                             racegp sex edugp incomegp smkgp drinkgp rpagp diabetes hsCVD HP Cancer hsChol drug_hc anxiety depression yvisit perios/clodds=wald;
 units Age=5 bmi=5 WC=5 HEI=5 SBP=10 DBP=10 WBC=SD CRP=SD TCP=SD HDP=SD TGP=SD scl_phone=1 scl_together=54 scl_neighb=54 scl_church=54 scl_meeting=54;
ods output  CLoddsWald=estimate ParameterEstimates=pvalue;
run;
data estimate2; set estimate;
 label=put(substr(Effect,1,20),$20.);
 OR=put(round(OddsRatioEst,0.01),4.2)||' ('||put(round(LowerCL,0.01),4.2)||'-'||put(round(UpperCL,0.01),4.2)||')';
 keep label OR units;  
run;
data pvalue2; set pvalue;
label=put(substr(Variable,1,10),$10.);
if _n_ = 1 then delete;
if ProbChiSq < 0.001 then pvalue = "< 0.001";
 else if ProbChiSq < 0.01 then pvalue = put(round(ProbChiSq,0.001), 8.4);
  else                          pvalue = put(round(ProbChiSq,0.01), 8.3);
  keep label pvalue;
run;
proc print data=estimate2; run; proc print data=pvalue2; run;

/*------------------------------------------------------------------------------------------------------------*/
/**********************************Part V: Model selection*****************************************************/
/*------------------------------------------------------------------------------------------------------------*/
%macro getmain(datain=estimate);
data estimate2; set &datain;
 label=put(substr(Effect,1,20),$20.);
 OR=put(round(OddsRatioEst,0.01),4.2)||' ('||put(round(LowerCL,0.01),4.2)||'-'||put(round(UpperCL,0.01),4.2)||')';
 keep label OR;  
run;
proc print; run;
%mend;
%macro getpvalue(datain=pvalue);
data pvalue2; set &datain;
label=put(substr(Variable,1,10),$10.);
if _n_ = 1 then delete;
if ProbChiSq < 0.001 then pvalue = "< 0.001";
 else if ProbChiSq < 0.01 then pvalue = put(round(ProbChiSq,0.001), 8.4);
  else                          pvalue = put(round(ProbChiSq,0.01), 8.3);
  keep label pvalue;
run; proc print data=pvalue2; run;
%mend;
data nomissing; set onlycogn;
array covar {*} Age bmi WC HEI SBP DBP WBC CRP TCP HDP TGP scl_phone scl_together scl_neighb scl_church scl_meeting 
                racegp sex edugp incomegp smkgp drinkgp rpagp diabetes hsCVD HP Cancer hsChol anxiety depression yvisit perios;
use_missing=0;
do i=1 to dim(covar);
 if covar{i} = . then use_missing=1;
end; drop i;
if use_missing=0;
run;
%con(datain=nomissing, conlist=Age bmi WC HEI SBP DBP WBC CRP TCP HDP TGP scl_phone scl_together scl_neighb scl_church scl_meeting cognition);
%cat(datain=nomissing, catlist=cogn_cat racegp sex edugp incomegp smkgp drinkgp rpagp diabetes hsCVD HP Cancer hsChol anxiety depression yvisit perios);


/*5.1 Full model*/
proc logistic data = nomissing;
 class Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary") hsChol(ref="NO")
       diabetes(ref="NO") hsCVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO")/param=ref;
 model cogn_cat (ref="NO") = Age bmi WC HEI SBP DBP WBC CRP TCP HDP TGP scl_phone scl_together scl_neighb scl_church scl_meeting
                             racegp sex edugp incomegp smkgp drinkgp rpagp diabetes hsCVD HP Cancer hsChol anxiety depression yvisit perios/
                             clodds=wald;
ods output  CLoddsWald=estimate ParameterEstimates=pvalue;
run;
%getpvalue;

/*5.2 Forward*/
proc logistic data = nomissing;
 class Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary")
       diabetes(ref="NO") hsCVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO")/param=ref;
 model cogn_cat (ref="NO") = Age bmi WC HEI SBP DBP WBC CRP TCP HDP TGP scl_phone scl_together scl_neighb scl_church scl_meeting
                             racegp sex edugp incomegp smkgp drinkgp rpagp diabetes hsCVD HP Cancer hsChol anxiety depression yvisit perios/
                             clodds=wald selection=forward slentry=0.3;
ods output  CLoddsWald=estimate ParameterEstimates=pvalue;
run;

/*5.3 Backward*/
proc logistic data = nomissing;
 class Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary")
       diabetes(ref="NO") hsCVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO")/param=ref;
 model cogn_cat (ref="NO") = Age bmi WC HEI SBP DBP WBC CRP TCP HDP TGP scl_phone scl_together scl_neighb scl_church scl_meeting
                             racegp sex edugp incomegp smkgp drinkgp rpagp diabetes hsCVD HP Cancer hsChol anxiety depression yvisit perios/
                             clodds=wald selection=backward slstay=0.1;
ods output  CLoddsWald=estimate ParameterEstimates=pvalue;
run;

/*5.3 Stepwise*/
proc logistic data = nomissing;
 class Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary") hsChol(ref="NO")
       diabetes(ref="NO") hsCVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO")/param=ref;
 model cogn_cat (ref="NO") = Age bmi WC HEI SBP DBP WBC CRP TCP HDP TGP scl_phone scl_together scl_neighb scl_church scl_meeting
                             racegp sex edugp incomegp smkgp drinkgp rpagp diabetes hsCVD HP Cancer hsChol anxiety depression yvisit perios/
                             clodds=wald selection=stepwise slentry=0.3 slstay=0.1;
ods output  CLoddsWald=estimate ParameterEstimates=pvalue;
run;

/*5.4 Stepwise-different entry level*/
proc logistic data = nomissing;
 class Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary") hsChol(ref="NO")
       diabetes(ref="NO") hsCVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO")/param=ref;
 model cogn_cat (ref="NO") = Age bmi WC HEI SBP DBP WBC CRP TCP HDP TGP scl_phone scl_together scl_neighb scl_church scl_meeting
                             racegp sex edugp incomegp smkgp drinkgp rpagp diabetes hsCVD HP Cancer hsChol anxiety depression yvisit perios/
                             clodds=wald selection=stepwise slentry=0.3 slstay=0.1;
ods output  CLoddsWald=estimate ParameterEstimates=pvalue;
run;


/*5.5 SVM model*/
proc logistic data = nomissing;
 class Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary") hsChol(ref="NO")
       diabetes(ref="NO") hsCVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO")/param=ref;
 model cogn_cat (ref="NO") = Age anxiety CRP WBC scl_neighb racegp WC DBP scl_phone scl_together/
                             clodds=wald;
ods output  CLoddsWald=estimate ParameterEstimates=pvalue;
run;


proc corr data = nomissing spearman noprob outp=corrtable;
 var Age bmi WC HEI SBP DBP WBC CRP TCP HDP TGP scl_phone scl_together scl_neighb scl_church scl_meeting
     racegp sex edugp incomegp smkgp drinkgp rpagp diabetes hsCVD HP Cancer HC anxiety depression yvisit perios;
run;

proc print data=corrtable; format _numeric_ f5.2; run;


/*------------------------------------------------------------------------------------------------------------*/
/**********************************Part V: Risk factor score   ************************************************/
/*------------------------------------------------------------------------------------------------------------*/
/*Stepwise*/
proc logistic data = nomissing;
 class Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary") hsChol(ref="NO")
       diabetes(ref="NO") hsCVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO")/param=ref;
 model cogn_cat (ref="NO") = Age bmi WC HEI SBP DBP WBC CRP TCP HDP TGP scl_phone scl_together scl_neighb scl_church scl_meeting
                             racegp sex edugp incomegp smkgp drinkgp rpagp diabetes hsCVD HP Cancer hsChol anxiety depression yvisit perios/
                             clodds=wald selection=stepwise slentry=0.3 slstay=0.1;
ods output  CLoddsWald=estimate ParameterEstimates=pvalue;
run;
/*Scoring details based on model with AIC=2112.6
Age ---------------------------   0.057
Waist circumference -----------  -0.017 
HEI ---------------------------  -0.012
Social connect with phone -----  -0.020
Social connect with church ----  -0.002
Race with others --------------   0.660
Race with black ---------------   0.416
Education ---------------------  -0.710
Income high -------------------  -0.875
Income middle -----------------  -0.325
Physical activity moderately --  -0.172
Physical activity vigorously --   0.197
Diabetes ----------------------   0.324
Hypercholesterolemia ----------  -0.422
Annual visit of dentists ------  -0.542
************************************************/

data nomissing; set nomissing;
s_age        =  0.0057*Age;
s_WC         = -0.017*WC;
s_HEI        = -0.012*HEI;
s_scl_phone  = -0.020*scl_phone;
s_scl_church = -0.002*scl_church;

s_race = 0;
if racegp = 3 then s_race = 0.660;
 else if racegp = 2 then s_race = 0.416;

s_edu = 0;
if edugp = 2 then s_edu = -0.710;

s_income = 0;
if incomegp = 3 then s_income = -0.875;
 else if incomegp = 2 then s_income = -0.325;

s_pa = 0;
if rpagp = 2 then s_pa = -0.172;
 else if rpagp = 1 then s_pa = 0.197;

 s_dm = 0;
 if diabetes = 1 then s_dm = 0.324;

 s_hc = 0;
 if HC = 1 then s_hc = -0.422;

 s_yvisit = 0;
 if yvisit = 1 then s_yvisit = -0.542;

 riskscore = sum (s_age,s_WC,s_scl_phone,s_scl_church,s_race, s_edu, s_income, s_pa, s_dm, s_hc, s_yvisit);
 run;

proc univariate data=nomissing;
  var riskscore;
  histogram;
run;

proc rank data=nomissing out=nomissing group=3;
  var riskscore; ranks riskscoret;
run;

proc freq data=nomissing;
  tables riskscoret*cogn_cat/nocol nopercent;
run;

proc logistic data = nomissing;
 class cogn_cat(ref="NO") riskscoret(ref='0')/param=ref;
 model cogn_cat (ref="NO") = riskscoret/clodds=wald;
 ods output  CLoddsWald=estimate ParameterEstimates=pvalue;
run;
%getmain; %getpvalue;
proc logistic data = nomissing;
 class cogn_cat(ref="NO") riskscoret(ref='0') racegp(ref="white") sex(ref="Male") /param=ref;
 model cogn_cat (ref="NO") = riskscoret Age racegp sex/clodds=wald;
 ods output  CLoddsWald=estimate ParameterEstimates=pvalue;
run;
%getmain; %getpvalue;
/*------------------------------------------------------------------------------------------------------------*/
/**********************************Part VI: Read clusters      ************************************************/
/*------------------------------------------------------------------------------------------------------------*/

proc corr data=nomissing;
 var Red_Green Orange_Red Yellow_Orange Orange_Blue total_clusters cognition;
run;

proc means data=nomissing min median max mean std; 
 var Red_Green Orange_Red Yellow_Orange Orange_Blue total_clusters cognition; 
 class cogn_cat; 
run;

 proc rank data=nomissing out=nomissing group=3;
  var Red_Green Orange_Red Yellow_Orange Orange_Blue total_clusters; 
  ranks Red_Greent Orange_Redt Yellow_Oranget Orange_Bluet total_clusterst;
 run;

/*crude model*/
%macro adjustclst(cluster);
proc logistic data = nomissing;
 class cogn_cat(ref="NO") riskscoret(ref='0') Red_Greent(ref='0') Orange_Redt(ref='0') Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") 
       Red_Greent(ref='0') Orange_Redt(ref='0') Yellow_Oranget(ref='0') Orange_Bluet(ref='0') total_clusterst(ref='0')/param=ref;
 model cogn_cat (ref="NO") = riskscoret &cluster Age racegp sex/clodds=wald;
 ods output  CLoddsWald=estimate ParameterEstimates=pvalue;
run;
%getmain; %getpvalue;
%mend;

%adjustclst(total_clusterst);
%adjustclst(Red_Greent);
%adjustclst(Orange_Redt);
%adjustclst(Yellow_Oranget);
%adjustclst(Orange_Bluet);


/*MV model*/
%adjustclst(total_clusterst);

/*only clusters*/
%macro clusteronly(cluster);
proc logistic data = nomissing;
 class cogn_cat(ref="NO") riskscoret(ref='0') Red_Greent(ref='0') Orange_Redt(ref='0') Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") 
       Red_Greent(ref='0') Orange_Redt(ref='0') Yellow_Oranget(ref='0') Orange_Bluet(ref='0') total_clusterst(ref='0')/param=ref;
 model cogn_cat (ref="NO") = &cluster /*Age racegp sex*//clodds=wald;
 ods output  CLoddsWald=estimate ParameterEstimates=pvalue;
run;%getmain; %getpvalue;
%mend;
%clusteronly(total_clusterst);
%clusteronly(Red_Greent);
%clusteronly(Orange_Redt);
%clusteronly(Yellow_Oranget);
%clusteronly(Orange_Bluet);

%clusteronly(riskscoret);

proc freq data=nomissing; 
 tables riskscoret*total_clusterst/norow nopercent chisq;
run;
proc freq data=nomissing; 
 tables cogn_cat*total_clusterst/norow nopercent chisq;
run;


/*------------------------------------------------------------------------------------------------------------*/
/**********************************Part VI: SVM Risk factor score   *******************************************/
/*------------------------------------------------------------------------------------------------------------*/
proc logistic data = nomissing;
 class Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary") hsChol(ref="NO")
       diabetes(ref="NO") hsCVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO")/param=ref;
 model cogn_cat (ref="NO") = Age WC HEI DBP WBC HDP racegp edugp incomegp rpagp depression yvisit/
                             clodds=wald;
run;

/*Scoring details based on model with AIC=2112.6
Age ---------------------------   0.057
Waist circumference -----------  -0.016 
HEI ---------------------------  -0.015
DBP ---------------------------  -0.006
WBC ---------------------------  -0.026
HDP ---------------------------  -0.0004
Race with others --------------   0.734
Race with black ---------------   0.367
Education ---------------------  -0.756
Income high -------------------  -0.861
Income middle -----------------  -0.301
Physical activity moderately --  -0.157
Physical activity vigorously --   0.212
Depression --------------------   0.342
Annual visit of dentists ------  -0.601
************************************************/

data nomissing; set nomissing;
s_age        =  0.057*Age;
s_WC         = -0.016*WC;
s_HEI        = -0.015*HEI;
s_DBP        = -0.006*DBP;
s_WBC        = -0.026*WBC;
s_HDP        =-0.0004*HDP;

s_race = 0;
if racegp = 3 then s_race = 0.734;
 else if racegp = 2 then s_race = 0.367;

s_edu = 0;
if edugp = 2 then s_edu = -0.756;

s_income = 0;
if incomegp = 3 then s_income = -0.861;
 else if incomegp = 2 then s_income = -0.301;

s_pa = 0;
if rpagp = 2 then s_pa = -0.157;
 else if rpagp = 1 then s_pa = 0.212;

 s_de = 0;
 if depression = 1 then s_de = 0.342;

 s_yvisit = 0;
 if yvisit = 1 then s_yvisit = -0.601;

 riskscore = sum (s_age, s_WC, s_HEI, s_DBP, s_WBC, s_HDP, s_race, s_edu, s_income, s_pa, s_de, s_yvisit);
 run;

proc univariate data=nomissing;
  var riskscore;
  histogram;
run;

proc rank data=nomissing out=nomissing group=3;
  var riskscore; ranks riskscoret;
run;

proc freq data=nomissing;
  tables riskscoret*cogn_cat/nocol nopercent;
run;

proc logistic data = nomissing;
 class cogn_cat(ref="NO") riskscoret(ref='0')/param=ref;
 model cogn_cat (ref="NO") = riskscoret/clodds=wald;
 ods output  CLoddsWald=estimate ParameterEstimates=pvalue;
run;
%getmain; %getpvalue;
proc logistic data = nomissing;
 class cogn_cat(ref="NO") riskscoret(ref='0') racegp(ref="white") sex(ref="Male") /param=ref;
 model cogn_cat (ref="NO") = riskscoret Age racegp sex/clodds=wald;
 ods output  CLoddsWald=estimate ParameterEstimates=pvalue;
run;
%getmain; %getpvalue;

















/*------------------------------------------------------------------------------------------------------------*/
/**********************************Part V: K-means cluster     ************************************************/
/*------------------------------------------------------------------------------------------------------------*/
%con(datain=onlycogn, conlist=Age bmi WC SBP DBP TGP scl_phone scl_church scl_clubs);
%cat(datain=onlycogn, catlist=Agegp racegp edugp incomegp smkgp drinkgp bmigp rpagp hsDM hsCVD hsHP yvisit perios);

data nomissing; set onlycogn;
array covar {*} Age bmi WC SBP DBP TGP scl_phone scl_church scl_clubs;
use_missing=0;
do i=1 to dim(covar);
 if covar{i} = . then use_missing=1;
end; drop i;
if use_missing=0;
keep SEQN Age bmi WC SBP DBP TGP scl_phone scl_church scl_clubs racegp edugp incomegp smkgp drinkgp rpagp hsDM hsCVD hsHP yvisit perios cognition cogn_cat;
run;


proc stdize data=nomissing out=nomissing_std method=range;
	var Age bmi WC SBP DBP TGP scl_phone scl_church scl_clubs racegp edugp incomegp smkgp drinkgp rpagp hsDM hsCVD hsHP yvisit perios;
run;

proc fastclus data=nomissing_std maxclusters=2 out=nomissing_scores;
	var Age bmi WC SBP DBP TGP scl_phone scl_church scl_clubs racegp edugp incomegp smkgp drinkgp rpagp hsDM hsCVD hsHP yvisit perios;
run;

proc princomp data=nomissing_scores plots(only)=(scree) out=Princomp_scores;
	var Age bmi WC SBP DBP TGP scl_phone scl_church scl_clubs racegp edugp incomegp smkgp drinkgp rpagp hsDM hsCVD hsHP yvisit perios;
run;


ods graphics / reset width=6.4in height=4.8in imagemap;
proc sgplot data=Princomp_scores;
	scatter x=Prin1 y=Prin2/ group=Cluster;
	xaxis grid;	yaxis grid;
run;
ods graphics / reset;


proc freq data=Princomp_scores;
 tables cogn_cat*Cluster/norow nopercent chisq;
run;

proc sort data = Princomp_scores; by Cluster; run;
proc g3d data=Princomp_scores;
	scatter Prin1*Prin2=Prin3/group Cluster;
run;



/*------------------------------------------------------------------------------------------------------------*/
/**********************************Part VI: Further cluster    ************************************************/
/*------------------------------------------------------------------------------------------------------------*/

data nomissing; set onlycogn;
array covar {*} Age bmi WC SBP DBP TGP scl_phone scl_church scl_clubs;
use_missing=0;
do i=1 to dim(covar);
 if covar{i} = . then use_missing=1;
end; drop i;
if use_missing=0;
/*keep SEQN Age bmi WC SBP DBP TGP scl_phone scl_church scl_clubs racegp edugp incomegp smkgp drinkgp rpagp diabetes Cancer HP yvisit perios cognition cogn_cat;*/
run;

proc stdize data=nomissing out=nomissing_std method=range;
	var Age bmi WC SBP DBP TGP scl_phone scl_church scl_clubs racegp edugp incomegp smkgp drinkgp rpagp diabetes Cancer HP yvisit perios;
run;

proc fastclus data=nomissing_std maxclusters=2 out=nomissing_scores;
	var Age bmi WC SBP DBP TGP scl_phone scl_church scl_clubs racegp edugp incomegp smkgp drinkgp rpagp diabetes Cancer HP yvisit perios;
run;


data newcluster; set nomissing_scores;
keep SEQN Cluster;
run;
proc sort data=newcluster; by SEQN; run;
proc sort data=nomissing;  by SEQN; run;
data usesample; merge newcluster nomissing; by SEQN; run;

proc freq data=usesample;
 tables cogn_cat*Cluster/norow nopercent chisq;
run;

proc univariate data=usesample;
 var cognition;
 class Cluster;
 histogram;
run;

proc logistic data=usesample plots(only)=roc;
 class Cluster (ref='1') Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary")
       diabetes(ref="NO") CVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO")/param=ref;
 model cogn_cat(ref='NO')=Cluster/clodds=wald;
run;

proc logistic data=usesample plots(only)=roc;
 class Cluster (ref='1') Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary")
       diabetes(ref="NO") CVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO")/param=ref;
 model cogn_cat(ref='NO')=Cluster Age racegp edugp incomegp smkgp drinkgp bmigp rpagp/clodds=wald;
run;

proc logistic data=usesample plots(only)=roc;
 class Cluster (ref='1') Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary")
       diabetes(ref="NO") CVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO")/param=ref;
 model cogn_cat(ref='NO')=Cluster Age racegp edugp incomegp smkgp drinkgp bmigp rpagp diabetes CVD HP Cancer HC anxiety depression/clodds=wald;
run;

proc logistic data=usesample plots(only)=roc;
 class Cluster (ref='1') Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary")
       diabetes(ref="NO") CVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO")/param=ref;
 model cogn_cat(ref='NO')=Cluster Age/clodds=wald;
run;


/*------------------------------------------------------------------------------------------------------------*/
/**********************************Part VII: Probability       ************************************************/
/*------------------------------------------------------------------------------------------------------------*/

proc logistic data=usesample plots(only)=roc;
 class Cluster (ref='1') Agegp(ref="40-<65") racegp(ref="white") sex(ref="Male") edugp(ref="1_<12 yrs") incomegp(ref="LOW_PIR=<1.3") 
       smkgp(ref="NonSmoker") drinkgp(ref="NonDrinker") bmigp(ref="NORMAL") rpagp(ref="Sedentary")
       diabetes(ref="NO") CVD(ref="NO") HP(ref="NO") Cancer(ref="NO") HC(ref="NO") anxiety(ref="NO") depression(ref="NO") 
       yvisit(ref="NO") perios(ref="No disease") cogn_cat(ref="NO")/param=ref;
 model cogn_cat(ref='NO')=Age racegp edugp incomegp smkgp drinkgp bmigp rpagp diabetes CVD HP Cancer HC anxiety depression/clodds=wald;
 output out=probability predicted=pred;
run;

proc means data=probability;
 var pred;
 class cogn_cat;
run;

/*transpose data*/
data testtrans; set usesample;
keep Age bmi WC SBP DBP TGP scl_phone scl_church scl_clubs racegp edugp incomegp smkgp drinkgp rpagp diabetes Cancer HP yvisit perios;
run;

proc transpose data=testtrans out=transdata;
run;

proc fastclus data=nomissing_std maxclusters=2 out=nomissing_scores;
run;




